
/*******************************************************************************/
/*  
	This script cleans the CPS data downloaded from IPUMS
		- March 2019 Annual Social and Economic Supplement
		- Monthly CPS  Feb, April, Nov 2010-2020
		- Covid module May 2020 - Nov 2020
		- Monthly CPS: Great Recession and Covid
    --
	Alexander Weinberg
	February 16, 2021
*/
/*******************************************************************************/

//________________________________________________________________
// Import ASEC data
use "../Raw Data/CPS/asec19_raw.dta", clear

//________________________________________________________________
// Survey asks about last year
replace age = age - 1 
rename (occly ind90ly uhrsworkly wkswork1  ) ///
	   (occ   ind90   hours_week weeks_ann ) 

//________________________________________________________________	   
// Educ --> Years schooling
// Get experience variable
merge m:1 educ using "../Raw Data/Crosswalks/educ_years_school.dta", nogen keep(match)
gen exp 			= (age - max(years_school,12)) - 6
gen exp2 			= exp ^ 2
drop if exp<0

//________________________________________________________________
// Generate Variables to use later in regression
replace incwage 	= . if incwage >= 99999998						// missing wage and salary data
replace hours_week 	= . if hours_week == 999						// missing weekly hours data

gen fedminwage      = 7.25
gen hours_ann 		= (hours_week * weeks_ann)
gen wage			= (incwage / hours_ann)

gen college 		= (years_school >= 16)
gen married 		= (marst == 1) 									// Married and spouse present
gen born_50states	= (bpl == 9900)
gen is_citizen      = (citizen != 5)

gen no_unemp	 	= (wksunem1 == 0) 
replace no_unemp 	= . if (wksunem1 == 99)							// missing 

gen own_home		= (ownershp == 10)
replace own_home 	= . if ownershp==0

gen fulltime 		= (fullpart == 1)
replace fulltime 	= . if inlist(fullpart, 0, 9)

gen employer_health = inrange(paidgh, 21, 22)
replace employer_health = . if paidgh==0

gen female 			= (sex == 2)
gen male 			= (sex == 1)
gen white 			= (race == 100)
replace white 		= . if race==999

gen big_firm 		= (firmsize == 9) | (firmsize == 8) // 500+ employees, missing:=small firm
gen old 			= (age >= 50)

xtile inc_tile 		= wage if !mi(wage), nq(10) 						// ssc install egenmore
gen top50 			= (inc_tile  >= 6)
replace top50		= . if mi(wage)

//________________________________________________________________
// SAMPLE - HPV RED
// Sample A
drop if mi(occ) | occ==0 									// Missing occupation
drop if mi(age)
drop if mi(sex) | sex==9
drop if (incwage>0) & (hours_week==0)
drop if mi(ind90) | inrange(ind90, 940, 998) 				// drop military
// Sample B	
keep if (age>=25) & (age<=65) 								// Prime age
drop if wage < 0.5 * fedminwage 							// Drop very low earners
// Sample C
drop if (hours_ann < 260)	 								// Worked more than a month of 8hr days 

compress
save "../Data/CPS/clean_cps19", replace


/*----------------------------------------------------*/
			/* [>   Monthly CPS   <] */ 
/*----------------------------------------------------*/


/*
//________________________________________________________________
// Basic cleaning prior to upload to reduce file size
do "../input/ipums_cleaning_script_basicmonthlyCPS.do"
drop if mi(occ) | mi(age) | mi(race) | mi(sex)
keep if inrange(age, 25, 65)
keep if inlist(month, 2, 4, 8) // just feb & april & august
keep age race sex marst occ empstat labforce uhrsworkorg classwkr educ compwt ///
bpl citizen earnweek covid* month year uhrswork1

forvalues j = 2010(1)2020 {
	preserve 
		keep if year == `j'
		save "../input/basic_monthly_CPS_raw_`j'.dta", replace
	restore 		
} 
*/

//________________________________________________________________
// Merge the subfiles
use "../Raw Data/CPS/basic_monthly_CPS_raw_2010.dta", clear
forvalues j = 2011(1)2020 {
	append using "../Raw Data/CPS/basic_monthly_CPS_raw_`j'.dta"
} 

//________________________________________________________________
// 2-DIGIT OCC CODE
recode occ ///
(0010/0430 	= 01) 	///
(0500/950 	= 05) 	/// 
(1000/1240 	= 10) 	/// 
(1300/1560 	= 13) 	/// 
(1600/1965 	= 16) 	/// 
(2000/2060 	= 20) 	///
(2100/2160 	= 21) 	/// 
(2200/2550 	= 22) 	///
(2600/2960 	= 26) 	///
(3000/3540 	= 30) 	///
(3600/3655 	= 36) 	///
(3700/3955 	= 37) 	///
(4000/4160 	= 40) 	///
(4200/4250 	= 42) 	///
(4300/4650 	= 43) 	///
(4700/4965 	= 47) 	///
(5000/5940 	= 50) 	///
(6005/6130 	= 60) 	///
(6200/6940 	= 62) 	/// 
(7000/7630 	= 70) 	///
(7700/8965 	= 77) 	///
(9000/9420 	= 90) 	///
(9500/9750 	= 95) 	///
(9800/9830 	= 98) 	///
(9920		=999)   ///
(nonmissing = .) 	///
(missing 	= .) 	///
, gen(occ_2digit)

#delimit;
label define occ_2_label
00 " "
01 "Manage."
05 "Busn./Finan."
10 "Computer/Math"
13 "Architecture/Engineering"
15 "Technician"
16 "Science"
20 "Community/Social"
21 "Legal"
22 "Educ."
26 "Entertainment/Media"
30 "Health tech."
36 "Health supp."
37 "Protection"
40 "Food prep."
42 "Building/maintenance"
43 "Personal care"
47 "Sales"
50 "Office/Admin."
60 "Farm/Fish/Forest"
62 "Construct./Extract."
70 "Install/Maintenance/Repair"
77 "Production"
90 "Transport."
95 "Material moving";
#delimit cr
label values occ_2digit occ_2_label

//________________________________________________________________
// Education 
merge m:1 educ using "../Raw Data/Crosswalks/educ_years_school.dta", nogen keep(match)

//________________________________________________________________
// Employment variables 
gen unemployed = inrange(empstat,20,22)
gen employed   = inrange(empstat,10,12) // not Military, not U, not NILF
gen nilf   	   = inrange(empstat,30,36) 

save "../Data/CPS/monthly_cps_2010_2020.dta", replace


/*----------------------------------------------------*/
			/* [>   COVID CPS   <] */ 
/*----------------------------------------------------*/


use "../Raw Data/CPS/raw_cps_covid", clear

// __________________________________________________
// TELEWORK
// reports whether the respondent teleworked or worked from home for pay during covid
replace covidtelew = . if covidtelew==99
replace covidtelew = 0 if covidtelew==1		// Not work remote
replace covidtelew = 1 if covidtelew==2		// Yes work remote 

label drop covidtelew_lbl
label define covidtelew_lbl 00 `"No"'
label define covidtelew_lbl 01 `"Yes"', add
label values covidtelew covidtelew_lbl

// __________________________________________________
// UNABLE TO WORK
// reports whether the respondent was unable to work b/c of covid
replace covidunaw = 0 if covidunaw==1		// Not unable to work
replace covidunaw = 1 if covidunaw==2		// Yes unable to work

label drop covidunaw_lbl
label define covidunaw_lbl 00 `"No"'
label define covidunaw_lbl 01 `"Yes"', add
label values covidunaw covidunaw_lbl

// __________________________________________________
// Binary LWFH, HPP scores
merge m:1 occ using "../Data/onet_occ_clean.dta"
keep if _merge == 3
drop _merge

// __________________________________________________
// ESSENTIAL WORKER MEASURE
merge m:1 occ using "../Data/DHS/occ3digit_essential.dta"
drop if _merge == 2
drop _merge

gen critical = (share_essential > 0.75)

// __________________________________________________
// Linearly scale
summ pp
local ppmax 	= r(max)
local ppmin 	= r(min)
replace pp  	= (pp - `ppmin') / (`ppmax' - `ppmin') // scale to [0,1]

summ  	low_wfh	
local 	wfhmax 	= r(max)
local 	wfhmin 	= r(min)
replace low_wfh  	= (low_wfh - `wfhmin') / (`wfhmax' - `wfhmin') // scale to [0,1]

save "../Data/CPS/cps_covid", replace

/*----------------------------------------------------*/
			/* [>   Recession   <] */ 
/*----------------------------------------------------*/

//________________________________________________________________
// Load data
use "../Raw Data/CPS/recession_cps", clear

//________________________________________________________________
// Sample selection
keep if inlist(month, 1, 2, 4) & inlist(year, 2008, 2010, 2020)
drop if occ==0	| mi(occ)		// missing occupation
drop if occ == 9840	 			// Military 
drop if year==2010 & inlist(month, 2, 4)
drop if mi(occ) | mi(age) | mi(race) | mi(sex)
drop if mi(uhrswork1)
keep if inrange(age, 25, 65)
keep if inrange(empstat,10,12) // not Military, not U, not NILF

//________________________________________________________________
// Employment 
gen employed   = inrange(empstat,10,12) // not Military, not U, not NILF

//________________________________________________________________
// Use occ2010 harmonized IPUMS codes for LWFH* and HPP* 
merge m:1 occ2010 using "../Raw Data/Crosswalks/occ2010_lwfhpp"


// __________________________________________________
// Linearly scale
summ pp
local ppmax 	= r(max)
local ppmin 	= r(min)
replace pp  	= (pp - `ppmin') / (`ppmax' - `ppmin') // scale to [0,1]

summ  	low_wfh	
local 	wfhmax 	= r(max)
local 	wfhmin 	= r(min)
replace low_wfh  	= (low_wfh - `wfhmin') / (`wfhmax' - `wfhmin') // scale to [0,1]

//________________________________________________________________
// BINARY VERSIONS OF THE VARIABLE
gen low_wfh_binary = .
replace low_wfh_binary = 1 if low_wfh > 0.1667
replace low_wfh_binary = 0 if low_wfh <= 0.1667

gen high_pp_binary = .
replace high_pp_binary = 1 if pp > 0.58632892
replace high_pp_binary = 0 if pp <= 0.58632892

drop _merge
drop if mi(low_wfh_binary) | mi(high_pp_binary)

save "../Data/CPS/recession_clean_cps", replace


// end
